package data_deepprocessing.algorithm.cvalue_ncvalue_tfidf.util;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;

import data_deepprocessing.algorithm.cvalue_ncvalue_tfidf.bean.CValueBean;
import uk.ac.shef.dcs.oak.jate.JATEException;
import uk.ac.shef.dcs.oak.jate.core.algorithm.CValueFeatureWrapper;
import uk.ac.shef.dcs.oak.jate.core.feature.FeatureCorpusTermFrequency;
import uk.ac.shef.dcs.oak.jate.core.feature.FeatureTermNest;
import uk.ac.shef.dcs.oak.jate.test.AlgorithmTester;


/**
 * @author YUANYUHU
 *
 */
public class CValueTool {
	
	public CValueTool(List<CValueBean> cValueBeans) {
		// TODO Auto-generated constructor stub
		initialize(cValueBeans);
	}
	
//	public CValueTool(List<CValueBean> cValueBeans, String output_path){
//		
//		initialize(cValueBeans);
//		printCValueResult(CVAlgorithmInvoking(), output_path);
//		
//	}
	
	private void initialize(List<CValueBean> cValueBeans){
		middleware(cValueBeans);
		generateNest(seed_contents);
	}
	
	/**
	 * List<seed_contents>
	 */
	protected static List<String> seed_contents = new ArrayList<>();
	/**
	 * HashMap<seedContent, seed_id>
	 */
	protected static HashMap<String, Integer> term_index_map = new HashMap<>();
	/**
	 * HashMap<seed_id, seedContent>
	 */
	protected static HashMap<Integer, String> index_term_map = new HashMap<>();
	/**
	 * HashMap<seed_id, seed_freqcount>
	 */
	protected static HashMap<Integer, Integer> index_frequency_map = new HashMap<>(); 
	/**
	 * 种子词的频次总和
	 */
	protected static Integer summary_frequency = 0;
	/**
	 * Map<index, Set<index>>  显示出种子之间的包含关系   
	 * e.g.
	 * 0  yuanyuhu
	 * 1  yuan
	 * 2  yu
	 * 
	 * map<0,set>
	 * map<1,set<0>>
	 * map<2,set<0,1>>
	 */
	protected Map<Integer, Set<Integer>> index_connectIndexSet_Map = new ConcurrentHashMap<Integer, Set<Integer>>();
	/**
	 * 从List<CValueBean>中得到相关信息
	 * @param cValueBeans
	 */
	private void middleware(List<CValueBean> cValueBeans){
		for(CValueBean bean : cValueBeans){
			term_index_map.put(bean.getSeed_Content(), bean.getSeed_id());
			index_frequency_map.put(bean.getSeed_id(), bean.getSeed_freqcount());
			index_term_map.put(bean.getSeed_id(), bean.getSeed_Content());
			seed_contents.add(bean.getSeed_Content());
			summary_frequency += bean.getSeed_freqcount(); 
		}
	}
	
	/**
	 * 形成字符串之间的关系
	 * @param seed_contents
	 */
	private void generateNest(List<String> seed_contents){
		for(String seedContent : seed_contents){
			Set<Integer> set = new HashSet<>();
			seed_contents.stream()
			.filter((content)-> (content.contains(seedContent)&&!content.equals(seedContent)))
			.forEach((seed)->{
				set.add(term_index_map.get(seed));
			});
			index_connectIndexSet_Map.put(term_index_map.get(seedContent), set);
		}
		
	}
	
	public GlobalIndexYYH getGlobalIndex(HashMap<String, Integer> term_index_map){
		GlobalIndexYYH termIndex = new GlobalIndexYYH();
		termIndex.setTermIdMap(term_index_map);
		return termIndex;
	}
	
	
	
	/**
	 * 这一块的内容最好看一下源码是怎么实现的
	 * 而且源码还给了相应的实例，那个YYH_TestCvalue,照着做就好
	 */
	public void  printCValueResult(AlgorithmTester tester,String output_path){
		
		try {
			tester.execute(null, output_path);
		} catch (JATEException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
	
	/**
	 * @param output_path
	 * @return AlgorithmTester  返回一个值为了给NC-Value使用
	 * @throws IOException 
	 * @throws JATEException 
	 */
	public AlgorithmTester CVAlgorithmInvoking() throws JATEException, IOException{
		//得到标签
		GlobalIndexYYH termIndex = getGlobalIndex(term_index_map);
		//频率信息
		FeatureCorpusTermFrequency termCorpusFreq = new FeatureCorpusTermFrequency(termIndex);
		//设置总的频率
		termCorpusFreq.setTotalCorpusTermFreq(summary_frequency);
		//种子的频率信息
		termCorpusFreq.set_termFreqMap(index_frequency_map);
		//种子之间的包含关系
		FeatureTermNest termNest = new FeatureTermNest(termIndex);
		//添加包含关系信息   e.g. 袁玉虎   contain 玉虎
		termNest.set_termNested(index_connectIndexSet_Map);
		//调用算法
		AlgorithmTester tester = new AlgorithmTester();
		tester.registerAlgorithm(new CValueAlgorithmYYH(), new CValueFeatureWrapper(termCorpusFreq,termNest));
		tester.execute();
		return tester;
	}
	
	

}





